User:ClueBot III/Source
Appearance
The following is automatically generated by ClueBot III.
- wikibot.classes.php is here.
Source to ClueBot III
[edit]<?PHP error_reporting(error_reporting() & ~E_NOTICE); /* * TODO: * Archive stats (size, number of topics, most recent, etc). */ declare(ticks = 1); function sig_handler($signo) { switch ($signo) { case SIGCHLD: while (($x = pcntl_waitpid(0, $status, WNOHANG)) != -1) { if ($x == 0) break; $status = pcntl_wexitstatus($status); } break; } } pcntl_signal(SIGCHLD, "sig_handler"); include 'cluebot3.config.php'; include 'wikibot.classes.php'; function splitintosections ($d,$level = 2) { // preg_match('/^(.*)((?<=^|\n)==[^=]+==.*)?$/Us',$data,$header); // echo $data."\n\n\n"; // print_r($header); // $d = $header[2]; // $header = $header[1]; // preg_match_all('/(?<=^|\n)==([^=]+)==(\n.*(?m)$(?-m))(?===[^=]+==.*|$)/AUs',$d,$sections,PREG_SET_ORDER); $ret = array(); // $ret[] = $header; $sections = array(); $th = ''; $tb = ''; $s = 0; for ($i = 0; $i < strlen($d); $i++) { if ((substr($d,$i,$level) == str_repeat('=',$level)) and ($d{$i + $level} != '=') and (($i == 0) or ($d{$i - 1} == "\n"))) { $j = 0; while (($d{$i + $j} != "\n") and ($i + $j < strlen($d))) $j++; if ((substr(trim(substr($d,$i,$j)),-1 * $level,$level) == str_repeat('=',$level)) and (substr(trim(substr($d,$i,$j)),(-1 * $level) - 1,1) != '=')) { if ($s == 1) $sections[] = array($th,$tb); else $header = $tb; $s = 1; $th = substr(trim(substr($d,$i,$j)),$level,-1 * $level); $tb = ''; $i += $j - 1; } } else { $tb .= $d{$i}; } } if ($s == 1) $sections[] = array($th,$tb); else $header = $tb; $ret[] = $header; // print_r($sections); foreach ($sections as $section) { $id = trim($section[0]); $i = 1; while (isset($ret[$id])) { $i++; $id = trim($section[0]).' '.$i; } $ret[$id] = array('header'=>$section[0],'content'=>$section[1]); } return $ret; } function extractnamespace ($page) { if (preg_match('/^((user|wikipedia|image|mediawiki|template|help|category|portal)? ?(talk)?):(.*)$/i',$page,$m)) { return array($m[1],$m[4]); } else { return array('',$m[4]); } } function namespacetoid ($namespace) { $convert = array ( '' => 0, 'talk' => 1, 'user' => 2, 'user talk' => 3, 'wikipedia' => 4, 'wikipedia talk'=> 5, 'image' => 6, 'image talk' => 7, 'mediawiki' => 8, 'mediawiki talk'=> 9, 'template' => 10, 'template talk' => 11, 'help' => 12, 'help talk' => 13, 'category' => 14, 'category talk' => 15, 'portal' => 100, 'portal talk' => 101 ); return $convert[strtolower(str_replace('_',' ',$namespace))]; } function doarchive ($page,$archiveprefix,$archivename,$age,$minarch,$minkeep,$defaulthead,$archivenow,$level,$noindex,$maxsects,$maxbytes,$htransform,$maxarchsize,$archnumberstart,$key) { global $wpq; global $wpapi; global $wpi; $rv = $wpapi->revisions($page,1,'older',true); if (!is_array($rv)) return false; $rv2 = $rv; $wpStarttime = gmdate('YmdHis', time()); $tmp = date_parse($rv[0]['timestamp']); $wpEdittime = gmdate('YmdHis', gmmktime($tmp['hour'],$tmp['minute'],$tmp['second'],$tmp['month'],$tmp['day'],$tmp['year'])); unset($tmp); $cursects = splitintosections($rv[0]['*'],$level); $ans = array(); $anr = array(); foreach ($archivenow as $k => $v) $archivenow[$k] = trim($v); foreach ($archivenow as $v) { $ans[] = $v; if (strpos($v,':') !== false) { $anr[] = str_replace('{{','{{tlu|',$v); } else { $anr[] = str_replace('{{','{{tl|',$v); } } $done = false; $lastrvid = null; while (!$done) { $rv = $wpapi->revisions($page,5000,$dir = 'older',false,$lastrvid); foreach ($rv as $rev) { if (preg_match('/(\d+)\-(\d+)\-(\d+)T(\d+):(\d+):(\d+)/',$rev['timestamp'],$m)) { $time = gmmktime($m[4],$m[5],$m[6],$m[2],$m[3],$m[1]); if ((time() - $time) >= ($age * 60 * 60)) { $done = true; break; } } } if ((!isset($rv[4999])) and ($done == false)) break; $lastrvid = $rev['revid']; if( !$lastrvid ) break; } if ($lastrvid == NULL) $tmp = array(array('*'=>'')); else $tmp = $wpapi->revisions($page,1,'older',true,$lastrvid); $oldsects = splitintosections($tmp[0]['*'],$level); $header = $cursects[0]; unset($cursects[0]); unset($oldsects[0]); $keepsects = array(); $archsects = array(); foreach ($oldsects as $id => $array) { if (!isset($cursects[$id])) { unset($oldsects[$id]); } } foreach ($cursects as $id => $array) { $an = false; foreach ($archivenow as $v) if (strpos($array['content'],$v) !== false) $an = true; if ((count($cursects) - count($archsects)) <= $minkeep) { $keepsects[$id] = $array; } elseif ($an == true) { $array['content'] = str_replace($ans,$anr,$array['content']); $archsects[$id] = $array; } elseif (preg_match('/\{\{User:ClueBot III\/DoNotArchiveUntil\|(\d+)\}\}/',$array['content'],$m) && time() < $m[1]) { $keepsects[$id] = $array; } elseif (!isset($oldsects[$id])) { $keepsects[$id] = $array; } elseif (trim($array['content']) == trim($oldsects[$id]['content'])) { $archsects[$id] = $array; } else { $keepsects[$id] = $array; } } if (($maxsects > 0) or ($maxbytes > 0)) { $i = 0; $b = 0; $keepsects = array_reverse($keepsects,true); foreach ($keepsects as $id => $array) { $i++; $b += strlen($array['content']); if (($maxsects > 0) and ($i > $maxsects)) { $archsects[$id] = $array; unset($keepsects[$id]); } elseif (($maxbytes > 0) and ($b > $maxbytes)) { $archsects[$id] = $array; unset($keepsects[$id]); } } $keepsects = array_reverse($keepsects,true); } if ($htransform != '') { $search = array(); $replace = array(); $transforms = explode('&&&',$htransform); foreach ($transforms as $v) { $v = explode('===',$v,2); $search[] = $v[0]; $replace[] = $v[1]; } foreach ($archsects as $id => $array) $archsects[$id]['header'] = preg_replace($search,$replace,$array['header']); } foreach ($oldsects as $id => $array) $tmpsectsprintr['oldsects'][] = $id; foreach ($cursects as $id => $array) $tmpsectsprintr['cursects'][] = $id; foreach ($keepsects as $id => $array) $tmpsectsprintr['keepsects'][] = $id; foreach ($archsects as $id => $array) $tmpsectsprintr['archsects'][] = $id; print_r($tmpsectsprintr); if ((count($archsects) > 0) and (count($archsects) >= $minarch)) { $pdata = $header; foreach ($keepsects as $array) { $pdata .= str_repeat('=',$level).$array['header'].str_repeat('=',$level).$array['content']; } // echo '$pdata = '.$pdata."\n\n\n\n"; if (substr(strtolower(str_replace('_',' ',$archiveprefix)),0,strlen($page)) != strtolower($page)) { global $pass; $ckey = trim(md5(trim($page).trim($archiveprefix).trim($pass))); if (trim($key) != $ckey) { echo 'Incorrect key and archiveprefix. $archiveprefix=\''.$archiveprefix.'\';$correctkey=\''.$ckey.'\';'."\n"; $archiveprefix = $page.'/Archives/'; } } if ($age == '99999') $age = 0; $i = $archnumberstart; $apage = $archiveprefix.gmdate(str_replace('%%i',$i,$archivename),(time() - ($age * 60 * 60))); if (($maxarchsize > 10000) and (strpos($archivename,'%%i') !== false)) while (strlen($wpq->getpage($apage)) > $maxarchsize) { $apage = $archiveprefix.gmdate(str_replace('%%i',$i,$archivename),(time() - ($age * 60 * 60))); $i++; } $adata = (($x = $wpq->getpage($apage))?$x:$defaulthead."\n")."\n"; foreach ($archsects as $array) { $adata .= str_repeat('=',$level).$array['header'].str_repeat('=',$level).$array['content']; } // echo '$adata = '.$adata."\n\n\n\n"; if (!$wpapi->edit($apage,$adata,'Archiving '.count($archsects).' discussion'.((count($archsects) > 1)?'s':'').' from [['.$page.']]. (BOT)',true,true)) return false; if (!$wpapi->edit($page,$pdata,'Archiving '.count($archsects).' discussion'.((count($archsects) > 1)?'s':'').' to [['.$apage.']]. (BOT)',true,true,$wpStarttime,$wpEdittime)) { $wpapi->edit($apage,$x,'Unarchiving '.count($archsects).' discussion'.((count($archsects) > 1)?'s':'').' from [['.$page.']]. (Archive failed) (BOT)',true,true); return false; } //generateindex($page,$archiveprefix); //generatedetailedindex($apage,$level,$adata); //generatestats($page,$archiveprefix); //generatemasterdetailedindex($page,$archiveprefix,$level); //NOFORK $pid = pcntl_fork(); if (/*NOFORK:$pid == 0*/true) { $search = array(); $replace = array(); foreach ($archsects as $header => $data) { $anchor = str_replace('%','.',urlencode(str_replace(' ','_',$header))); $newanchor = str_replace('%','.',urlencode(str_replace(' ','_',trim($data['header'])))); $search[] = $page.'#'.$anchor; $replace[] = $apage.'#'.$newanchor; $search[] = $page.'#'.str_replace('.20','_',$anchor); $replace[] = $apage.'#'.str_replace('.20','_',$newanchor); $search[] = $page.'#'.$header; $replace[] = $apage.'#'.trim($data['header']); } $pagelist = array(); $continue = null; $bl = $wpapi->backlinks($page,500,$continue); foreach ($bl as $data) { $pagelist[] = $data['title']; } while (count($bl) >= 500) { $bl = $wpapi->backlinks($page,500,$continue); foreach ($bl as $data) { $pagelist[] = $data['title']; } } print_r($search); print_r($replace); // print_r($pagelist); $forktasklist = array(); $count = 0; foreach ($pagelist as $title) { $count++; $group = floor($count / 500); $forktasklist[$group][] = $title; } unset($pagelist); for ($i=0;$i<count($forktasklist);$i++) { //NOFORK $pid = pcntl_fork(); if (/*NOFORK:$pid == 0*/true) { foreach ($forktasklist[$i] as $title) { $data = $wpq->getpage($title); $newdata = str_replace($search,$replace,$data); if ($data != $newdata) { // echo 'Would post to '.$title."\n"; $wpapi->edit($title,$newdata,'Fixing links to archived content. (BOT)',true,true); } } //NOFORK die(); } } //NOFORK die(); } } if ($noindex != 1) if (/*NOFORK:pcntl_fork() == 0*/true) { generateindex($page,$archiveprefix,$level); /*NOFORK:die();*/ } } function generateindex ($origpage,$archiveprefix,$level) { global $user; global $wpapi; global $wpi; $tmp = extractnamespace($archiveprefix); $array = $wpapi->listprefix($tmp[1],namespacetoid($tmp[0]),500); print_r($array); $data = ''; $ddata = '{|class="wikitable sortable"'."\n".'! Order !! Header !! Start Date !! End Date !! Comments !! Size !! Archive'."\n";; foreach ($array as $page) { $tmp = $wpapi->revisions($page['title'],1,'newer'); $newarray[$page['title']] = $tmp[0]['timestamp']; } asort($newarray); foreach ($newarray as $page => $time) { $data .= '* [['.$page.'|'.str_replace($archiveprefix,'',$page).']]'."\n"; /* $ddata .= */generatedetailedindex($page,$level); $ddata .= '{{User:'.$user.'/Detailed Indices/'.$page.'}}'."\n"; } $ddata .= '|}'; var_dump($wpapi->edit('User:'.$user.'/Indices/'.$origpage,$data,'Setting index for [['.$origpage.']]. (BOT)')); var_dump($wpapi->edit('User:'.$user.'/Master Detailed Indices/'.$origpage,$ddata,'Setting detailed index for [['.$origpage.']]. (BOT)')); } function generatedetailedindex ($apage,$level,$adata=null,$ret=false) { global $user; global $wpq; global $wpi; global $wpapi; $i = 1; $version = '1.1'; if ($adata === null) $adata = $wpq->getpage($apage); $checksum = md5(md5($version).md5($adata)); $cdata = $wpq->getpage('User:'.$user.'/Detailed Indices/'.$apage); if (preg_match('/\<\!-- CB3 MD5:([0-9a-f]{32}) --\>/i',$cdata,$m)) { if (trim(strtolower($m[1])) == trim(strtolower($checksum))) { return null; } } $sects = splitintosections($adata,$level); $data = ''; unset($sects[0]); $header = '<!-- CB3 MD5:'.trim($checksum).' -->'."\n".'{|class="wikitable sortable"'."\n".'! Order !! Header !! Start Date !! End Date !! Comments !! Size !! Archive'."\n"; foreach ($sects as $sect) { $data .= '|-'."\n".'| '.$i.' || '.trim($sect['header']).' || '; if (preg_match_all('/(\d{2}):(\d{2}), (\d+) ([a-zA-Z]+) (\d{4}) \(UTC\)/i',$sect['content'],$dates,PREG_SET_ORDER)) { $times = array(); $month = array('January' => 1, 'February' => 2, 'March' => 3, 'April' => 4, 'May' => 5, 'June' => 6, 'July' => 7, 'August' => 8, 'September' => 9, 'October' => 10, 'November' => 11, 'December' => 12 ); foreach ($dates as $date) $times[] = gmmktime($date[1],$date[2],0,$month[$date[4]],$date[3],$date[5]); sort($times,SORT_NUMERIC); $data .= gmdate('Y-m-d H:i',$times[0]).' || '.gmdate('Y-m-d H:i',$times[count($times)-1]).' || '.count($times); } else { $data .= 'Unknown || Unknown || Unknown'; } $data .= ' || '.strlen($sect['content']).' || [['.$apage.'#'.str_replace(array('[[',']]',"'''","''",'{{','}}','|'),'',trim($sect['header'])).'|'.$apage.']]'."\n"; $i++; } $footer = '|}'; if (!$ret) $wpapi->edit('User:'.$user.'/Detailed Indices/'.$apage,'<noinclude>'.$header.'</noinclude>'.$data.'<noinclude>'.$footer.'</noinclude>','Updating detailed index for [['.$apage.']]. (BOT)'); return $data; } function parsetemplate ($page) { global $wpq; global $wpapi; global $user; $pagedata = $wpq->getpage($page); $positions = array(); $x = 0; while (($x = stripos($pagedata,'{{user:'.$user.'/archivethis',$x)) !== false) { $positions[] = $x; $x++; } foreach ($positions as $pkey => $x) { $set = array(); $data = substr($pagedata,$x); $pos = 1; $depth = 1; $q = 0; $part = 0; $tmp = array('{'); $tmp2= array(); while (($depth != 0) and ($pos < strlen($data))) { $tmp[$part] .= $data{$pos}; if (!$q) { if ($data{$pos} == '{') $depth++; if ($data{$pos} == '}') $depth--; if (($data{$pos} == '|') or ($depth == 0)) { if ($depth == 0) $tmp[$part] = substr($tmp[$part],0,-1); $tmp[$part] = substr($tmp[$part],0,-1); $part = 0; if (!isset($tmp[1])) $tmp2[] = $tmp[0]; else $tmp2[strtolower(trim($tmp[0]))] = rtrim($tmp[1]); unset($tmp); $tmp = array(); } if ($data{$pos} == '=') { if ($part == 0) { $tmp[$part] = substr($tmp[$part],0,-1); $part = 1; } } if (substr($data,$pos,8) == '<nowiki>') { $tmp[$part] = substr($tmp[$part],0,-1); $q = 1; $pos += 7; } } if (substr($data,$pos,9) == '</nowiki>') { $tmp[$part] = substr($tmp[$part],0,-1); $q = 0; $pos += 8; } $pos++; } $positions[$pkey] = array($x,$pos); $data = $tmp2; unset($pos,$depth,$tmp,$x,$q,$tmp2,$part); unset($data[0]); $set = $data; print_r($set);// return NULL; if ((isset($set['once'])?trim($set['once']):0) == 1) { $wpapi->edit($page,substr($pagedata,0,$positions[$pkey][0]).'<!-- '.substr($pagedata,$positions[$pkey][0],$positions[$pkey][1]).' -->'.substr($pagedata,$positions[$pkey][0]+$positions[$pkey][1]),'Commenting out config. (BOT)',true,true); sleep(3); } echo 'doarchive('.$page.',' .$set['archiveprefix'].',' .$set['format'].',' .$set['age'].',' .(isset($set['minarchthreads'])?$set['minarchthreads']:0).',' .(isset($set['minkeepthreads'])?$set['minkeepthreads']:0).',' .(isset($set['header'])?$set['header']:'{{Talkarchive}}').',' .(isset($set['archivenow'])?explode(',',$set['archivenow']):array('{{User:ClueBot III/ArchiveNow}}')).',' .(isset($set['headerlevel'])?$set['headerlevel']:2).',' .(isset($set['nogenerateindex'])?$set['nogenerateindex']:0).',' .(isset($set['maxkeepthreads'])?$set['maxkeepthreads']:0).',' .(isset($set['maxkeepbytes'])?$set['maxkeepbytes']:0).',' .(isset($set['transformheader'])?$set['transformheader']:'').',' .(isset($set['maxarchsize'])?$set['maxarchsize']:0).',' .(isset($set['numberstart'])?$set['numberstart']:1).',' .(isset($set['key'])?$set['key']:'') .")\n"; if ($pkey > 0) sleep(2); doarchive($page, $set['archiveprefix'], $set['format'], $set['age'], (isset($set['minarchthreads'])?$set['minarchthreads']:0), (isset($set['minkeepthreads'])?$set['minkeepthreads']:0), (isset($set['header'])?$set['header']:'{{Talkarchive}}'), (isset($set['archivenow'])?explode(',',$set['archivenow']):array('{{User:ClueBot III/ArchiveNow}}')), (isset($set['headerlevel'])?$set['headerlevel']:2), (isset($set['nogenerateindex'])?$set['nogenerateindex']:0), (isset($set['maxkeepthreads'])?$set['maxkeepthreads']:0), (isset($set['maxkeepbytes'])?$set['maxkeepbytes']:0), (isset($set['transformheader'])?$set['transformheader']:''), (isset($set['maxarchsize'])?$set['maxarchsize']:0), (isset($set['numberstart'])?$set['numberstart']:1), (isset($set['key'])?$set['key']:'') ); } } $wpq = new wikipediaquery; $wpi = new wikipediaindex; $wpapi = new wikipediaapi; // echo generatedetailedindex('User talk:Cobi/Archives/2008/August',2,null,true)."\n"; // print_r(splitintosections($wpq->getpage('Wikipedia:Administrator intervention against vandalism'),3)); // parsetemplate('Wikipedia:WikiProject on open proxies/Unchecked'); // die(); $wpapi->login($user,$pass); // parsetemplate('Wikipedia:WikiProject on open proxies/Unchecked'); // die(); $wpapi->edit('User:'.$user.'/Source', 'The following is automatically generated by [[User:'.$user.'|'.$user."]].\n\n\n\n" . '* wikibot.classes.php is [[User:ClueBot/Source|here]].' . "\n\n\n\n==Source to ".$user."==\n\n" . '<pre>'.htmlentities(file_get_contents(__FILE__))."</pre>\n\n\n\n~~~~", 'Automated source upload.'); /* Our source code, we force post this because this is *our* page, and it triggers the nobots. */ while (1) { //NOFORK $pid = pcntl_fork(); if (/*NOFORK:$pid == 0*/true) { $titles = array(); $continue = null; $ei = $wpapi->embeddedin('User:'.$user.'/ArchiveThis',500,$continue); foreach ($ei as $data) { $titles[] = $data['title']; } while (isset($ei[499])) { $ei = $wpapi->embeddedin('User:'.$user.'/ArchiveThis',500,$continue); foreach ($ei as $data) { $titles[] = $data['title']; } } foreach ($titles as $title) { parsetemplate($title); } //NOFORK die(); } //die(); $time = time(); while ((time() - $time) < 21600) { // was 3600. sleep(1); } } ?>
ClueBot III (talk) 17:49, 19 June 2015 (UTC)